## ============================================================================
## 02_si_s1_demographics.R
##
## Purpose:   Produce sample representativeness tables and in-text diagnostics
##            comparing survey samples to population benchmarks and leading
##            national surveys.
##            Corresponds to SI Appendix, Section S1.1, Tables S1-S5.
##
## Inputs:    combined_data.rds (via 00_setup.R),
##            aus_target.rds, aes_2022.rds, au_election_polls.rds,
##            usa_target.rds, anes_2024.rds, ces_2024.rds
## Outputs:   Tables/table_s1.tex  (Table S1: AU sample vs. AES comparison)
##            Tables/table_s2.tex  (Table S2: AU party support)
##            Tables/table_s3.tex  (Table S3: AU GCCSA regional distribution)
##            Tables/table_s4.tex  (Table S4: US sample vs. CES/ANES comparison)
##            Tables/table_s5.tex  (Table S5: US 7-point PID)
## ============================================================================

source("00_setup.R")

#### ============== Australia ============== ####

aus_target <- read_rds("aus_target.rds")
aes_dat    <- read_rds("aes_2022.rds")

aus_covs <-
  c("x_gender_f", "x_age_cat", "x_state", "x_edu_college", "x_native",
    "x_employ_binary", "x_hhi")

aes_covs <-
  c("x_gender_f", "x_age_cat", "x_state", "x_edu_college", "x_native",
    "x_employ_binary")

## Binary variables: levels displayed as "No" / "Yes" in tables
bin_vars <- c("x_gender_f", "x_edu_college", "x_native", "x_employ_binary")

## Generate survey weights for AU sample
aus_weighted <-
  combined_dat %>%
  filter(sample == "AU") %>%
  mutate(
    x_ideo_cat  = factor(x_ideo_cat,  levels = c("Left", "Moderate", "Right")),
    x_pid_comb2 = factor(x_pid_comb2, levels = c("Left", "Independent", "Right"))
  ) %>%
  harvest(., target = aus_target[aus_covs])

diagnosed_aus <-
  aus_weighted %>%
  diagnose_weights(target = aus_target[aus_covs])

diagnosed_aes <-
  aes_dat %>%
  diagnose_weights(target = aus_target[aes_covs])

## ---- Partisanship ----

aus_pid <-
  aus_weighted %>%
  filter(!is.na(x_pid_comb2)) %>%
  count(x_pid_comb2) %>%
  mutate(prop_original = n / sum(n))

aus_pid <-
  aus_weighted %>%
  filter(!is.na(x_pid_comb2)) %>%
  group_by(x_pid_comb2) %>%
  summarise(weighted_n = sum(weights, na.rm = TRUE)) %>%
  mutate(prop_weighted = weighted_n / sum(weighted_n)) %>%
  left_join(aus_pid, by = join_by(x_pid_comb2)) %>%
  mutate(variable = "x_pid_comb2", target = NA,
         error_original = NA, error_weighted = NA) %>%
  rename(level = x_pid_comb2) %>%
  select(variable, level, prop_original, prop_weighted, target,
         error_original, error_weighted)

aes_pid <-
  aes_dat %>%
  filter(!is.na(x_pid_comb2)) %>%
  count(x_pid_comb2) %>%
  mutate(prop_original = n / sum(n))

aes_pid <-
  aes_dat %>%
  filter(!is.na(x_pid_comb2)) %>%
  group_by(x_pid_comb2) %>%
  summarise(weighted_n = sum(weights, na.rm = TRUE)) %>%
  mutate(prop_weighted = weighted_n / sum(weighted_n)) %>%
  left_join(aes_pid, by = join_by(x_pid_comb2)) %>%
  mutate(variable = "x_pid_comb2", target = NA,
         error_original = NA, error_weighted = NA) %>%
  rename(level = x_pid_comb2) %>%
  select(variable, level, prop_original, prop_weighted, target,
         error_original, error_weighted)

## ---- Ideology ----

aus_ideo <-
  aus_weighted %>%
  filter(!is.na(x_ideo_cat)) %>%
  count(x_ideo_cat) %>%
  mutate(prop_original = n / sum(n))

aus_ideo <-
  aus_weighted %>%
  filter(!is.na(x_ideo_cat)) %>%
  group_by(x_ideo_cat) %>%
  summarise(weighted_n = sum(weights, na.rm = TRUE)) %>%
  mutate(prop_weighted = weighted_n / sum(weighted_n)) %>%
  left_join(aus_ideo, by = join_by(x_ideo_cat)) %>%
  mutate(variable = "x_ideo_cat", target = NA,
         error_original = NA, error_weighted = NA) %>%
  rename(level = x_ideo_cat) %>%
  select(variable, level, prop_original, prop_weighted, target,
         error_original, error_weighted)

aes_ideo <-
  aes_dat %>%
  filter(!is.na(x_ideo_cat)) %>%
  count(x_ideo_cat) %>%
  mutate(prop_original = n / sum(n))

aes_ideo <-
  aes_dat %>%
  filter(!is.na(x_ideo_cat)) %>%
  group_by(x_ideo_cat) %>%
  summarise(weighted_n = sum(weights, na.rm = TRUE)) %>%
  mutate(prop_weighted = weighted_n / sum(weighted_n)) %>%
  left_join(aes_ideo, by = join_by(x_ideo_cat)) %>%
  mutate(variable = "x_ideo_cat", target = NA,
         error_original = NA, error_weighted = NA) %>%
  rename(level = x_ideo_cat) %>%
  select(variable, level, prop_original, prop_weighted, target,
         error_original, error_weighted)

## ---- Table S1: Combine and export ----

aus_demos <-
  bind_rows(diagnosed_aus, aus_pid, aus_ideo) %>%
  mutate(source = "sample") %>%
  group_by(variable) %>%
  mutate(
    prop_original  = normalize(prop_original),
    prop_weighted  = normalize(prop_weighted),
    error_original = abs(target - prop_original),
    error_weighted = abs(target - prop_weighted)
  ) %>%
  ungroup()

aes_demos <-
  bind_rows(diagnosed_aes, aes_pid, aes_ideo) %>%
  mutate(source = "AES") %>%
  group_by(variable) %>%
  mutate(
    prop_original  = normalize(prop_original),
    prop_weighted  = normalize(prop_weighted),
    error_original = abs(target - prop_original),
    error_weighted = abs(target - prop_weighted)
  ) %>%
  ungroup()

## Export Table S1
aus_tab <-
  bind_rows(aus_demos, aes_demos) %>%
  pivot_wider(
    names_from  = source,
    values_from = c(prop_original, prop_weighted, error_original, error_weighted)
  ) %>%
  select(variable, level, target,
         prop_original_sample, error_original_sample,
         prop_original_AES, error_original_AES,
         prop_weighted_AES, error_weighted_AES) %>%
  mutate(
    level = case_when(
      variable %in% bin_vars & level == "0" ~ "No",
      variable %in% bin_vars & level == "1" ~ "Yes",
      level == "< $30,000"           ~ "Less than \\$30,000",
      level == "$30,000 - $49,999"   ~ "\\$30,000-\\$49,999",
      level == "$50,000 - $79,999"   ~ "\\$50,000-\\$79,999",
      level == "$80,000 - $99,999"   ~ "\\$80,000-\\$99,999",
      level == "$100,000 - $149,999" ~ "\\$100,000-\\$149,999",
      level == "$150,000 - $199,999" ~ "\\$150,000-\\$199,999",
      level == "$200,000 +"          ~ "\\$200,000 or more",
      .default = as.character(level)
    )
  ) %>%
  mutate(across(where(is.numeric), \(x) sprintf("%.2f", x))) %>%
  mutate(across(where(is.character), \(x) replace(x, x == "NA", "-")))

aus_tab %>%
  select(-variable) %>%
  xtable() %>%
  print(
    include.rownames       = FALSE,
    include.colnames       = FALSE,
    only.contents          = TRUE,
    type                   = "latex",
    sanitize.text.function = identity,
    comment                = FALSE,
    hline.after            = NULL,
    file = paste0(tab_dir, "table_s1.tex")
  )

## ---- Table S2: Party support comparison ----
aus_party <-
  aus_weighted %>%
  mutate(party = case_when(
    x_pid_short == "Labor"             ~ "Labor",
    x_pid_short == "Liberal/National"  ~ "Coalition",
    x_pid_short == "Greens"            ~ "Greens",
    .default = "Other"
  )) %>%
  filter(!is.na(party)) %>%
  count(party) %>%
  mutate(prop = n / sum(n), source = "Sample - Unweighted") %>%
  select(-n)

aes_party_uw <-
  aes_dat %>%
  mutate(party = case_when(
    x_pid_short == "Labor"             ~ "Labor",
    x_pid_short == "Liberal/National"  ~ "Coalition",
    x_pid_short == "Greens"            ~ "Greens",
    .default = "Other"
  )) %>%
  filter(!is.na(party)) %>%
  count(party) %>%
  mutate(prop = n / sum(n), source = "AES - Unweighted") %>%
  select(-n)

aes_party_w <-
  aes_dat %>%
  mutate(party = case_when(
    x_pid_short == "Labor"             ~ "Labor",
    x_pid_short == "Liberal/National"  ~ "Coalition",
    x_pid_short == "Greens"            ~ "Greens",
    .default = "Other"
  )) %>%
  filter(!is.na(party)) %>%
  group_by(party) %>%
  summarise(weighted_n = sum(weights, na.rm = TRUE)) %>%
  mutate(prop = weighted_n / sum(weighted_n), source = "AES - Weighted") %>%
  select(-weighted_n)

aus_polls <-
  read_rds("au_election_polls.rds") %>%
  pivot_longer(cols = coalition:other, names_to = "party", values_to = "prop") %>%
  mutate(party = str_to_title(party))

avg_polls <-
  aus_polls %>%
  group_by(party) %>%
  summarise(prop = sum(prop * sample_size) / sum(sample_size)) %>%
  mutate(source = "Polling averages")

elect_22 <-
  data.frame(
    party  = factor(c("Labor", "Coalition", "Greens", "Other")),
    prop   = c(0.33, 0.34, 0.12, 0.21),
    source = "2022 Election"
  )

elect_25 <-
  data.frame(
    party  = factor(c("Labor", "Coalition", "Greens", "Other")),
    prop   = c(0.35, 0.32, 0.12, 0.21),
    source = "2025 Election"
  )

party_tab <-
  bind_rows(elect_22, elect_25, aus_party, aes_party_w, aes_party_uw, avg_polls) %>%
  mutate(party = factor(party, levels = c("Labor", "Coalition", "Greens", "Other"))) %>%
  pivot_wider(names_from = source, values_from = prop) %>%
  select(party, `2022 Election`, `2025 Election`, `Polling averages`,
         `Sample - Unweighted`, `AES - Unweighted`, `AES - Weighted`) %>%
  mutate(across(where(is.numeric), \(x) sprintf("%.2f", x))) %>%
  mutate(across(where(is.character), \(x) replace(x, x == "NA", "-")))

## Export Table S2
party_tab %>%
  xtable() %>%
  print(
    include.rownames       = FALSE,
    include.colnames       = FALSE,
    only.contents          = TRUE,
    type                   = "latex",
    sanitize.text.function = identity,
    comment                = FALSE,
    hline.after            = NULL,
    file = paste0(tab_dir, "table_s2.tex")
  )

## ---- Table S3: GCCSA regional distribution ----

aus_gcc <-
  combined_dat %>%
  filter(sample == "AU") %>%
  harvest(., target = aus_target["x_gcc"])

diagnosed_gcc <-
  aus_gcc %>%
  diagnose_weights(target = aus_target["x_gcc"])

gcc_tab <-
  diagnosed_gcc %>%
  select(variable, level, target, prop_original, error_original) %>%
  mutate(across(where(is.numeric), \(x) sprintf("%.2f", x))) %>%
  mutate(across(where(is.character), \(x) replace(x, x == "NA", "-")))

## Export Table S3
gcc_tab %>%
  select(-variable) %>%
  xtable() %>%
  print(
    include.rownames       = FALSE,
    include.colnames       = FALSE,
    only.contents          = TRUE,
    type                   = "latex",
    sanitize.text.function = identity,
    comment                = FALSE,
    hline.after            = NULL,
    file = paste0(tab_dir, "table_s3.tex")
  )

## ---- In-text estimates: Section S1.1 (Australia) ----

## Mean absolute errors: sample vs. AES
cat("AU sample mean absolute error (Section S1.1):\n")
print(aus_demos %>%
        summarise(error_uw = mean(error_original, na.rm = TRUE),
                  error_w  = mean(error_weighted, na.rm = TRUE))) 

cat("AES mean absolute error (Section S1.1):\n")
print(aes_demos %>%
        summarise(error_uw = mean(error_original, na.rm = TRUE),
                  error_w  = mean(error_weighted, na.rm = TRUE))) 

## AES design effect and effective sample size
## ("the effective sample size is reduced by more than half (from 2,508 to 1,230; design effect = 2.04)")
cat("AES design effect and ESS (Section S1.1):\n")
cat("Design effect:", round(design_effect(aes_dat$weights), 2), "\n")
cat("Effective N:", round(effective_sample_size(aes_dat$weights), 0), "\n")
cat("Nominal N:", nrow(aes_dat), "\n")

## AU sample design effect and effective sample size after weighting
## ("the effective sample size drops from 1,819 to 1,471; design effect = 1.24")
cat("AU sample design effect and ESS after weighting (Section S1.1):\n")
cat("Design effect:", round(design_effect(aus_weighted$weights), 2), "\n")
cat("Effective N:", round(effective_sample_size(aus_weighted$weights), 0), "\n")
cat("Nominal N:", nrow(aus_weighted), "\n")

## AES income: proportion above $200k (unweighted and weighted)
## ("13% of the unweighted AES sample (12.5% weighted) report incomes above $200,000")
cat("AES income above $200k (Section S1.1):\n")
print(aes_dat %>%
        filter(!is.na(x_hhi_aes)) %>%
        mutate(x_hhi_aes = fct_recode(x_hhi_aes, "$200,000+" = "> $250,000",
                                      "$200,000+" = "$200,001 - $250,000")) %>% 
        group_by(x_hhi_aes) %>%
        summarise(
          prop_uw = n() / nrow(filter(aes_dat, !is.na(x_hhi_aes))),
          prop_w  = sum(weights, na.rm = TRUE) /
            sum(filter(aes_dat, !is.na(x_hhi_aes))$weights, na.rm = TRUE)
        )) 

## In-text: party proportions in sample vs. polling averages and elections
## ("41% in unweighted sample vs. 35% in polling and 2025 election"; "31% vs. 32-34%")
cat("AU party support comparison (Section S1.1):\n")
print(party_tab)


## In-text: "74% of our sample resides in a major urban area (100,000 or more),
##           compared to 72% of the Australian population"
## Note: 72% is the ABS population benchmark.
cat("AU proportion in major urban area (100,000+) from x_ssr (Section S1.1):\n")
print(combined_dat %>%
        filter(sample == "AU", !is.na(x_ssr)) %>%
        mutate(urban_100k = x_ssr %in% c("100,000 to 999,999", "1 million or more")) %>%
        summarise(prop_urban = mean(urban_100k)))

#### ============== United States ============== ####

usa_target <- read_rds("usa_target.rds")
anes_dat   <- read_rds("anes_2024.rds")
ces_dat    <- read_rds("ces_2024.rds")

usa_covs <-
  c("x_gender_f", "x_age_cat", "x_ethnicity", "x_region", "x_edu_college",
    "x_native", "x_employ_binary", "x_hhi")

ces_covs  <- usa_covs
anes_covs <-
  c("x_gender_f", "x_age_cat", "x_ethnicity", "x_region", "x_edu_college",
    "x_native", "x_employ_binary")

## Generate survey weights for US sample
usa_weighted <-
  combined_dat %>%
  filter(sample == "US") %>%
  mutate(
    x_pid_comb2 = factor(x_pid_comb2, levels = c("Left", "Independent", "Right"))
  ) %>%
  harvest(., target = usa_target[usa_covs])

diagnosed_usa  <- usa_weighted %>% diagnose_weights(target = usa_target[usa_covs])
diagnosed_ces  <- ces_dat  %>% diagnose_weights(target = usa_target[ces_covs])
diagnosed_anes <- anes_dat %>% diagnose_weights(target = usa_target[anes_covs])

## ---- Partisanship ----

usa_pid <-
  usa_weighted %>%
  filter(!is.na(x_pid_3)) %>%
  count(x_pid_3) %>%
  mutate(prop_original = n / sum(n))

usa_pid <-
  usa_weighted %>%
  filter(!is.na(x_pid_3)) %>%
  mutate(x_pid_3 = factor(x_pid_3, levels = c("Republican", "Independent", "Democrat"))) %>%
  group_by(x_pid_3) %>%
  summarise(weighted_n = sum(weights, na.rm = TRUE)) %>%
  mutate(prop_weighted = weighted_n / sum(weighted_n)) %>%
  left_join(usa_pid, by = join_by(x_pid_3)) %>%
  mutate(variable = "x_pid_3", target = NA,
         error_original = NA, error_weighted = NA) %>%
  rename(level = x_pid_3) %>%
  select(variable, level, prop_original, prop_weighted, target,
         error_original, error_weighted)

anes_pid <-
  anes_dat %>%
  filter(!is.na(x_pid_3)) %>%
  count(x_pid_3) %>%
  mutate(prop_original = n / sum(n))

anes_pid <-
  anes_dat %>%
  filter(!is.na(x_pid_3)) %>%
  mutate(x_pid_3 = factor(x_pid_3, levels = c("Republican", "Independent", "Democrat"))) %>%
  group_by(x_pid_3) %>%
  summarise(weighted_n = sum(weights, na.rm = TRUE)) %>%
  mutate(prop_weighted = weighted_n / sum(weighted_n)) %>%
  left_join(anes_pid, by = join_by(x_pid_3)) %>%
  mutate(variable = "x_pid_3", target = NA,
         error_original = NA, error_weighted = NA) %>%
  rename(level = x_pid_3) %>%
  select(variable, level, prop_original, prop_weighted, target,
         error_original, error_weighted)

ces_pid <-
  ces_dat %>%
  filter(!is.na(x_pid_3)) %>%
  count(x_pid_3) %>%
  mutate(prop_original = n / sum(n))

ces_pid <-
  ces_dat %>%
  filter(!is.na(x_pid_3)) %>%
  mutate(x_pid_3 = factor(x_pid_3, levels = c("Republican", "Independent", "Democrat"))) %>%
  group_by(x_pid_3) %>%
  summarise(weighted_n = sum(weights, na.rm = TRUE)) %>%
  mutate(prop_weighted = weighted_n / sum(weighted_n)) %>%
  left_join(ces_pid, by = join_by(x_pid_3)) %>%
  mutate(variable = "x_pid_3", target = NA,
         error_original = NA, error_weighted = NA) %>%
  rename(level = x_pid_3) %>%
  select(variable, level, prop_original, prop_weighted, target,
         error_original, error_weighted)

## ---- Table S4: Combine and export ----

usa_demos <-
  bind_rows(diagnosed_usa, usa_pid) %>%
  mutate(source = "sample") %>%
  group_by(variable) %>%
  mutate(
    prop_original  = normalize(prop_original),
    prop_weighted  = normalize(prop_weighted),
    error_original = abs(target - prop_original),
    error_weighted = abs(target - prop_weighted)
  ) %>%
  ungroup()

anes_demos <-
  bind_rows(diagnosed_anes, anes_pid) %>%
  mutate(source = "ANES") %>%
  group_by(variable) %>%
  mutate(
    prop_original  = normalize(prop_original),
    prop_weighted  = normalize(prop_weighted),
    error_original = abs(target - prop_original),
    error_weighted = abs(target - prop_weighted)
  ) %>%
  ungroup()

ces_demos <-
  bind_rows(diagnosed_ces, ces_pid) %>%
  mutate(source = "CES") %>%
  group_by(variable) %>%
  mutate(
    prop_original  = normalize(prop_original),
    prop_weighted  = normalize(prop_weighted),
    error_original = abs(target - prop_original),
    error_weighted = abs(target - prop_weighted)
  ) %>%
  ungroup()

## Export Table S4
usa_tab <-
  bind_rows(usa_demos, ces_demos, anes_demos) %>%
  pivot_wider(
    names_from  = source,
    values_from = c(prop_original, prop_weighted, error_original, error_weighted)
  ) %>%
  select(variable, level, target,
         prop_original_sample, error_original_sample,
         prop_original_CES,  error_original_CES,  prop_weighted_CES,  error_weighted_CES,
         prop_original_ANES, error_original_ANES, prop_weighted_ANES, error_weighted_ANES) %>%
  mutate(
    level = case_when(
      variable %in% bin_vars & level == "0" ~ "No",
      variable %in% bin_vars & level == "1" ~ "Yes",
      level == "< $30,000"           ~ "Less than \\$30,000",
      level == "$30,000 - $49,999"   ~ "\\$30,000-\\$49,999",
      level == "$50,000 - $79,999"   ~ "\\$50,000-\\$79,999",
      level == "$80,000 - $99,999"   ~ "\\$80,000-\\$99,999",
      level == "$100,000 - $149,999" ~ "\\$100,000-\\$149,999",
      level == "$150,000 - $199,999" ~ "\\$150,000-\\$199,999",
      level == "$200,000 +"          ~ "\\$200,000 or more",
      level == "Latino"              ~ "Hispanic/Latino",
      level == "Asian"               ~ "AAPI",
      level == "Native"              ~ "AIAN",
      variable == "x_pid_3" & level == "Democrat"   ~ "Left (Democrat)",
      variable == "x_pid_3" & level == "Republican" ~ "Right (Republican)",
      .default = as.character(level)
    )
  ) %>%
  mutate(across(where(is.numeric), \(x) sprintf("%.2f", x))) %>%
  mutate(across(where(is.character), \(x) replace(x, x == "NA", "-")))

## Reorder PID rows to match SI display order: Democrat → Independent → Republican
pid_idx   <- which(usa_tab$variable == "x_pid_3")
other_idx <- setdiff(seq_len(nrow(usa_tab)), pid_idx)
usa_tab   <- usa_tab[c(other_idx, rev(pid_idx)), ]

usa_tab %>%
  select(-variable) %>%
  xtable() %>%
  print(
    include.rownames       = FALSE,
    include.colnames       = FALSE,
    only.contents          = TRUE,
    type                   = "latex",
    sanitize.text.function = identity,
    comment                = FALSE,
    hline.after            = NULL,
    file = paste0(tab_dir, "table_s4.tex")
  )

## ---- Table S5: 7-point PID comparison ----
usa_party <-
  usa_weighted %>%
  filter(!is.na(x_pid_7)) %>%
  count(x_pid_7) %>%
  mutate(prop = n / sum(n), source = "Sample - Unweighted") %>%
  select(-n)

ces_party_uw <-
  ces_dat %>%
  filter(!is.na(x_pid_7)) %>%
  count(x_pid_7) %>%
  mutate(prop = n / sum(n), source = "CES - Unweighted") %>%
  select(-n)

anes_party_uw <-
  anes_dat %>%
  filter(!is.na(x_pid_7)) %>%
  count(x_pid_7) %>%
  mutate(prop = n / sum(n), source = "ANES - Unweighted") %>%
  select(-n)

ces_party_w <-
  ces_dat %>%
  filter(!is.na(x_pid_7)) %>%
  group_by(x_pid_7) %>%
  summarise(weighted_n = sum(weights, na.rm = TRUE)) %>%
  mutate(prop = weighted_n / sum(weighted_n), source = "CES - Weighted") %>%
  select(-weighted_n)

anes_party_w <-
  anes_dat %>%
  filter(!is.na(x_pid_7)) %>%
  group_by(x_pid_7) %>%
  summarise(weighted_n = sum(weights, na.rm = TRUE)) %>%
  mutate(prop = weighted_n / sum(weighted_n), source = "ANES - Weighted") %>%
  select(-weighted_n)

pid7_tab <-
  bind_rows(usa_party, ces_party_uw, ces_party_w, anes_party_uw, anes_party_w) %>%
  pivot_wider(names_from = source, values_from = prop) %>%
  select(x_pid_7, `Sample - Unweighted`, `CES - Unweighted`, `CES - Weighted`,
         `ANES - Unweighted`, `ANES - Weighted`) %>%
  mutate(across(where(is.numeric), \(x) sprintf("%.2f", x))) %>%
  mutate(across(where(is.character), \(x) replace(x, x == "NA", "-")))

## Export Table S5
pid7_tab %>%
  xtable() %>%
  print(
    include.rownames       = FALSE,
    include.colnames       = FALSE,
    only.contents          = TRUE,
    type                   = "latex",
    sanitize.text.function = identity,
    comment                = FALSE,
    hline.after            = NULL,
    file = paste0(tab_dir, "table_s5.tex")
  )

## ---- In-text estimates: Section S1.2 (United States) ----

## Mean absolute errors: sample vs. CES vs. ANES
cat("US sample mean absolute error (Section S1.2):\n")
print(usa_demos %>%
        summarise(error_uw = mean(error_original, na.rm = TRUE),
                  error_w  = mean(error_weighted, na.rm = TRUE)))

cat("CES mean absolute error (Section S1.2):\n")
print(ces_demos %>%
        summarise(error_uw = mean(error_original, na.rm = TRUE),
                  error_w  = mean(error_weighted, na.rm = TRUE)))

cat("ANES mean absolute error (Section S1.2):\n")
print(anes_demos %>%
        summarise(error_uw = mean(error_original, na.rm = TRUE),
                  error_w  = mean(error_weighted, na.rm = TRUE)))

## CES and ANES design effects and effective sample sizes
## ("reduced by 68% in CES (from 60,000 to 19,178; design effect = 3.13)")
## ("49% in ANES (5,520 to 2,816; design effect = 1.96)")
cat("CES design effect and ESS (Section S1.2):\n")
cat("Design effect:", round(design_effect(ces_dat$weights), 2), "\n")
cat("Effective N:", round(effective_sample_size(ces_dat$weights), 0), "\n")
cat("Nominal N:", nrow(ces_dat), "\n")

cat("ANES design effect and ESS (Section S1.2):\n")
cat("Design effect:", round(design_effect(anes_dat$weights), 2), "\n")
cat("Effective N:", round(effective_sample_size(anes_dat$weights), 0), "\n")
cat("Nominal N:", nrow(anes_dat), "\n")

## US sample design effect and effective sample size after weighting
## ("reduction in effective sample size from 1,819 to 1,303; design effect = 1.40")
cat("US sample design effect and ESS after weighting (Section S1.2):\n")
cat("Design effect:", round(design_effect(usa_weighted$weights), 2), "\n")
cat("Effective N:", round(effective_sample_size(usa_weighted$weights), 0), "\n")
cat("Nominal N:", nrow(usa_weighted), "\n")

## 3-point PID proportions: sample, CES, ANES (unweighted and weighted)
## ("39% Democrats, 30% Independents, 31% Republicans" in sample;
##  "39% Democrat, 34% Independent, 27% Republican" CES unweighted;
##  "35% Democrat, 33% Independent, 32% Republican" ANES unweighted)
cat("3-point PID: sample, CES, ANES (Section S1.2):\n")
print(bind_rows(
  usa_pid  %>% mutate(source = "Sample"),
  ces_pid  %>% mutate(source = "CES"),
  anes_pid %>% mutate(source = "ANES")) %>%
  select(source, level, prop_original, prop_weighted))

## In-text: 7-point PID proportions
## ("25% Strong Democrats vs. 28% CES and 24% ANES; 16% Strong Republicans;
##   8% pure Independents vs. 13% CES and 7% ANES")
cat("7-point PID comparison (Section S1.2):\n")
print(pid7_tab)


## ---- Session Info -----------------------------------------------------------
sessionInfo()